% =========== Leave One Patient Out (LOPO) Cross Validation ==============
% Data-Driven Identification of Prognostic Tumor Subpopulations Using 
% Spatially Mapped t-SNE of Mass Spectrometry Imaging Data
% ============================ About LOPO =================================
% This file may takes long time to finish LOPO analysis for all patients
% A patient is exluded and the entire pipeline is applied, and then that
% patient is eventually used for clinical outcome prediction (i.e. to be 
% assigned to a certain survival group). Note that this is a douple loop
% corss validation in which the left patient was unseen during feature
% extraction (i.e. steps of tSNE, discretization and SAM analysis) and
% classifier training. LOPO prevents any information leakage during the
% cross validatio, and thus the final result is not biased.
% groups)
% Note: Some parts of this pipeline use R implementation. Please, install R and a
% proper toolbox to connect R&Matlab.
% Note: Please run this file (LOPO.0 to LOPO.6) cell-by-cell (Ctrl+Enter).
% Also note that step (LOPO.5) needs an interactive effort to run it
% properly as you have to run an R implementation in the file
% "SAM_BreastCancer_R.txt" to get the significant m/z features. 
% (look first at SAM_BreastCancer_R.txt, if it is needed then change the value of parameter "delta")
% ========================================================================
% Written By Walid M. Abdelmoula, LUMC, NL

% Running the tSNE on the Tumor heterogeneity on each patient seperately:
clear all, clc
load('../breast_cancer_dataset.mat');
addpath('../../Matlab_Files/Functions/');
addpath('../../Matlab_Files/Functions/kaplanmeier_plot/');
addpath('../../Matlab_Files/Functions/FisherExactTest/');
addpath('../../Matlab_Files/RviaMatlab/');

%% LOPO.0 Load Data:
S = size(MSI_data_cube);
N_Patients = length(Clinical_data);
N_Masses = S(3);
Folded_Measurements = reshape(MSI_data_cube, [S(1)*S(2) S(3)]); 
Tumors_All = Folded_Measurements((goodlist>0),:);
indx = find((goodlist>0));
%% LOPO.1 Dimensionality Reduction: Leave one patient out and build a new tSNE
for i =1:N_Patients
    SelectedPatient = i;
    ii = find(pixel_to_sample_ID(goodlist>0)~=SelectedPatient); %exclude this patient
    Tumors_PatientExcluded = Tumors_All(ii,:);
    mappedX_3D_PatientExc = fast_tsne_seed(Tumors_PatientExcluded,3);
    mappedX2_3D_PatientExc = fast_tsne_seed(Tumors_PatientExcluded, 3, [], [], [], [], mappedX_3D_PatientExc);
    File_Name = strcat('tSNE_Maps_ Leave_One_Patient\LeavePatient_',num2str(PatientID(i)),'.mat');
    save(File_Name,'mappedX2_3D_PatientExc');
    clear mappedX2_3D_PatientExc mappedX_3D_PatientExc mappedX2_3D_PatientExc
end

%% LOPO1.1 Load tSNE maps reconstructed from excluding one patient
SelectedPatient = 28; %ID of the exluded patient
File_Name = strcat('tSNE_Maps_ Leave_One_Patient\LeavePatient_',num2str(SelectedPatient),'.mat');
load(File_Name);
pixel_to_sample_ID_New = find(pixel_to_sample_ID(goodlist>0)~=SelectedPatient);
labc_PatientOut = embedding2LABcolormap(mappedX2_3D_PatientExc);
% figure,scatter3(mappedX2_3D_PatientExc(:,1),mappedX2_3D_PatientExc(:,2),mappedX2_3D_PatientExc(:,3),3,labc_PatientOut); grid off
indx_New = indx(pixel_to_sample_ID_New);
% tSNE spatial image
tSNE_SegmentationMap_OnePatientOut = Visualize_tSNE_2DImage(labc_PatientOut,S(1),S(2),indx_New);
%% LOPO.2 Find the optimal number of clusters from the tSNE Space using Bisecting Kmeans
[LOU_K_Clusters, LOU_Corr_Values, LOU_IDX, LOU_C] = Optimal_NumberClusters(mappedX2_3D_PatientExc,S,indx_New);
[Ranked_Correlation, LOUT_Ranked_Index] = sort(LOU_Corr_Values,'descend');
%% Select the Ranked peak( 1st, 2nd,...etc)
[Ranked_Correlation, Ranked_Index] = sort(LOU_Corr_Values,'descend');
Rank_Order = 3;
K_ranked = Ranked_Index(Rank_Order)
opt=[1E-6 1 1];
%% LOPO.3 Link to CLinical Data
Patient_Tissues = pixel_to_sample_ID(goodlist == 1);
New_RegionOverview = Patient_Tissues(Patient_Tissues~=SelectedPatient); %Exclude this selected patient (PatientID)
[N_Subpop_pN0 ,N_Subpop_pN1, LOUT_IDXs, LOUT_IDXs_Threshold,Compact_Subpopulations] = InvestigateMetastasis(mappedX2_3D_PatientExc,K_ranked,S,indx_New, New_RegionOverview,Clinical_data,goodlist);
%% ===== Visualize the binary distribution of a selected  metastatic associated cluster
% i.e. Color the entire tSNE map with back but red for those points that
% belong to metastatic associated cluster
clear Labelsn_Colors;
clusterID = 5; % To select this clusterID, look at (N_Subpop_pN0 ,N_Subpop_pN1)
Ln = length(LOUT_IDXs);
tSNE_SegmentationMapX_LOUT = Visualize_tSNE_2DImage(LOUT_IDXs==clusterID,S(1),S(2),indx_New);
close;
ii = find(LOUT_IDXs == clusterID);
Labelsn_Colors(1:Ln) = 100;
Labelsn_Colors(ii) = 10;

RGB_COLORS_OfClusters = [0.9 0 0; 0 0 0];
figure,scatter3(mappedX2_3D_PatientExc(:,1),mappedX2_3D_PatientExc(:,2),mappedX2_3D_PatientExc(:,3),3,Labelsn_Colors);
grid off
colormap(RGB_COLORS_OfClusters);
%% LOPO.4 Investigate Statistical Significance: Fisher Exact test:
[ Sig,PValue,ContigenMatrix ] = FisherExactTest( N_Subpop_pN0 ,N_Subpop_pN1)
%% LOPO.5 SAM: Cluster_ID: represents the tumor subpopulations we are interested to retrieve its prognostic signature 
clear msdata_average_CombinedSubpop
IDXs_Values = LOUT_IDXs_Threshold;
indxoo = indx_New;
SelectedSubpop = unique(LOUT_IDXs);
MultiClass_MultiLabeling = 0;
Only_Metas_Sub = 5; %This is the ID of metastatic exclusive cluster.Look at (N_Subpop_pN0 ,N_Subpop_pN1)
SAM_Breast 
%Note1: If the significant m/z list didn't appear in this matlab interface, please run in R 
% the file "SAM_BreastCancer_R.txt" but after you first give a run first to this function "SAM_Breast"
% Note2: Please make sure to save the significant m/z resulted from running SAM
% repeated for each excluded patients (e.g. Assume you saved it in a file
% called SAM_LOPO.xlsx).
%% LOPO.6 Classification: To predict that excluded patient
Mzs = [4965,4999,5171,5067,6980,9265,6650,6277,8568,7009,8603,6224];
%Build a binary classifier basd on the identified metastasis group
clear NewLabels_IDXs Percentage_TestLabels_ALL
NewLabels_IDXs = LOUT_IDXs;
Only_Metas_Sub = 5; %This is the ID of metastatic exclusive cluster.Look at (N_Subpop_pN0 ,N_Subpop_pN1)
mi = find(NewLabels_IDXs == Only_Metas_Sub);
Not_mi = find(NewLabels_IDXs ~= Only_Metas_Sub);
NewLabels_IDXs(mi) = 2; % Mtastasis labeled as 2
NewLabels_IDXs(Not_mi) = 1;
Classification_LOUT